{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import numpy as np\n",
    "import math\n",
    "from operator import itemgetter\n",
    "\n",
    "\n",
    "# 近邻数目最多为20\n",
    "K=20  \n",
    "\n",
    "# 推荐物品数目最多为10\n",
    "N=10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[\n",
    "    ['A','b'],\n",
    "    ['A','d'],\n",
    "    ['B','a'],\n",
    "    ['B','b'],\n",
    "    ['B','c'],\n",
    "    ['C','a'],\n",
    "    ['C','b'],\n",
    "    ['C','d'],\n",
    "    ['D','a'],\n",
    "    ['D','e']\n",
    "]\n",
    "data=np.array(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "# user->item的映射\n",
    "# key：user\n",
    "# value：item的set\n",
    "train_data = {}\n",
    "\n",
    "for user, item in data:\n",
    "    train_data.setdefault(user,set())\n",
    "    train_data[user].add(item)\n",
    "\n",
    "# 用户数量和物品数量\n",
    "n_users = len(list(set(data[:,0])))\n",
    "n_items = len(list(set(data[:,1])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'b', 'd'}, 'B': {'a', 'b', 'c'}, 'C': {'a', 'b', 'd'}, 'D': {'a', 'e'}}"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'d': 2, 'b': 3, 'c': 1, 'a': 3, 'e': 1}"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算每个物品被用户评分的个数\n",
    "item_cnt = {}\n",
    "for user, items in train_data.items():\n",
    "    for i in items:\n",
    "        # count item popularity\n",
    "        item_cnt.setdefault(i,0)\n",
    "        item_cnt[i] += 1\n",
    "item_cnt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    " # 计算物品之间共同评分的物品数,C为修正后的，count为修正前的。\n",
    "C = dict()\n",
    "count=dict()\n",
    "for user, items in train_data.items():\n",
    "    for u in items:\n",
    "        for v in items:\n",
    "            if u == v:\n",
    "                continue\n",
    "            C.setdefault(u,{})\n",
    "            C[u].setdefault(v,0)\n",
    "            C[u][v] += math.log(n_items/len(items))\n",
    "\n",
    "            count.setdefault(u, {})\n",
    "            count[u].setdefault(v, 0)\n",
    "            count[u][v] += 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'d': {'b': 1.4271163556401458, 'a': 0.5108256237659907},\n",
       " 'b': {'d': 1.4271163556401458,\n",
       "  'c': 0.5108256237659907,\n",
       "  'a': 1.0216512475319814},\n",
       " 'c': {'a': 0.5108256237659907, 'b': 0.5108256237659907},\n",
       " 'a': {'c': 0.5108256237659907,\n",
       "  'b': 1.0216512475319814,\n",
       "  'd': 0.5108256237659907,\n",
       "  'e': 0.9162907318741551},\n",
       " 'e': {'a': 0.9162907318741551}}"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "C"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'d': {'b': 2, 'a': 1},\n",
       " 'b': {'d': 2, 'c': 1, 'a': 2},\n",
       " 'c': {'a': 1, 'b': 1},\n",
       " 'a': {'c': 1, 'b': 2, 'd': 1, 'e': 1},\n",
       " 'e': {'a': 1}}"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算原始的余弦相似度\n",
    "def calCosineSimi():\n",
    "    item_sim = dict()\n",
    "    for u, related_items in C.items():\n",
    "        item_sim[u]={}\n",
    "        for v, cuv in related_items.items():\n",
    "            item_sim[u][v] = count[u][v] / math.sqrt(item_cnt[u] * item_cnt[v])\n",
    "    return item_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算修正后的余弦相似度\n",
    "def calCorrectionCosineSimi():\n",
    "    item_sim = dict()\n",
    "    for u, related_items in C.items():\n",
    "        item_sim[u]={}\n",
    "        for v, cuv in related_items.items():\n",
    "            item_sim[u][v] = cuv / math.sqrt(item_cnt[u] * item_cnt[v])\n",
    "    return item_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算条件概率相似度\n",
    "def calConditionalProbabilitySimi():\n",
    "    item_sim = dict()\n",
    "    for u, related_items in C.items():\n",
    "        item_sim[u]={}\n",
    "        for v, cuv in related_items.items():\n",
    "            item_sim[u][v] = count[u][v] / (item_cnt[u])\n",
    "    return item_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict(item_sim, user):\n",
    "    rank = dict()\n",
    "    interacted_items = train_data[user]\n",
    "\n",
    "    # 对每个评分的物品寻找最近K个物品，构建评分列表\n",
    "    for item in interacted_items:\n",
    "        for similar_item, similarity_factor in sorted(item_sim[item].items(),\n",
    "                                                       key=itemgetter(1), reverse=True)[:K]:\n",
    "            if similar_item in interacted_items:\n",
    "                continue\n",
    "            rank.setdefault(similar_item, 0)\n",
    "            rank[similar_item] += similarity_factor\n",
    "\n",
    "    rec_list = []\n",
    "    rec_items = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]\n",
    "    for item, score in rec_items:\n",
    "        rec_list.append([item,score])\n",
    "\n",
    "    # 返回最大N个物品\n",
    "    return rec_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'d': {'b': 0.8164965809277261, 'a': 0.4082482904638631},\n",
       " 'b': {'d': 0.8164965809277261,\n",
       "  'c': 0.5773502691896258,\n",
       "  'a': 0.6666666666666666},\n",
       " 'c': {'a': 0.5773502691896258, 'b': 0.5773502691896258},\n",
       " 'a': {'c': 0.5773502691896258,\n",
       "  'b': 0.6666666666666666,\n",
       "  'd': 0.4082482904638631,\n",
       "  'e': 0.5773502691896258},\n",
       " 'e': {'a': 0.5773502691896258}}"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 余弦相似度\n",
    "item_sim1=calCosineSimi()\n",
    "item_sim1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'d': {'b': 0.582617812483108, 'a': 0.2085436876276022},\n",
       " 'b': {'d': 0.582617812483108,\n",
       "  'c': 0.29492531139025324,\n",
       "  'a': 0.3405504158439938},\n",
       " 'c': {'a': 0.29492531139025324, 'b': 0.29492531139025324},\n",
       " 'a': {'c': 0.29492531139025324,\n",
       "  'b': 0.3405504158439938,\n",
       "  'd': 0.2085436876276022,\n",
       "  'e': 0.5290207007035027},\n",
       " 'e': {'a': 0.5290207007035027}}"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 修正后的余弦相似度\n",
    "item_sim2=calCorrectionCosineSimi()\n",
    "item_sim2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'d': {'b': 1.0, 'a': 0.5},\n",
       " 'b': {'d': 0.6666666666666666,\n",
       "  'c': 0.3333333333333333,\n",
       "  'a': 0.6666666666666666},\n",
       " 'c': {'a': 1.0, 'b': 1.0},\n",
       " 'a': {'c': 0.3333333333333333,\n",
       "  'b': 0.6666666666666666,\n",
       "  'd': 0.3333333333333333,\n",
       "  'e': 0.3333333333333333},\n",
       " 'e': {'a': 1.0}}"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 条件概率相似度\n",
    "item_sim3=calConditionalProbabilitySimi()\n",
    "item_sim3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给 A 推荐： [['a', 1.0749149571305296], ['c', 0.5773502691896258]]\n",
      "给 B 推荐： [['d', 1.2247448713915892], ['e', 0.5773502691896258]]\n",
      "给 C 推荐： [['c', 1.1547005383792517], ['e', 0.5773502691896258]]\n",
      "给 D 推荐： [['b', 0.6666666666666666], ['c', 0.5773502691896258], ['d', 0.4082482904638631]]\n"
     ]
    }
   ],
   "source": [
    "# 使用余弦相似度进行推荐\n",
    "for user in ['A','B','C','D']:\n",
    "        rec_list=predict(item_sim1,user)\n",
    "        print(\"给\",user,\"推荐：\",rec_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给 A 推荐： [['a', 0.549094103471596], ['c', 0.29492531139025324]]\n",
      "给 B 推荐： [['d', 0.7911615001107102], ['e', 0.5290207007035027]]\n",
      "给 C 推荐： [['c', 0.5898506227805065], ['e', 0.5290207007035027]]\n",
      "给 D 推荐： [['b', 0.3405504158439938], ['c', 0.29492531139025324], ['d', 0.2085436876276022]]\n"
     ]
    }
   ],
   "source": [
    "# 使用修正后的余弦相似度进行推荐\n",
    "for user in ['A','B','C','D']:\n",
    "        rec_list=predict(item_sim2,user)\n",
    "        print(\"给\",user,\"推荐：\",rec_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给 A 推荐： [['a', 1.1666666666666665], ['c', 0.3333333333333333]]\n",
      "给 B 推荐： [['d', 1.0], ['e', 0.3333333333333333]]\n",
      "给 C 推荐： [['c', 0.6666666666666666], ['e', 0.3333333333333333]]\n",
      "给 D 推荐： [['b', 0.6666666666666666], ['c', 0.3333333333333333], ['d', 0.3333333333333333]]\n"
     ]
    }
   ],
   "source": [
    "# 使用条件概率进行推荐\n",
    "for user in ['A','B','C','D']:\n",
    "        rec_list=predict(item_sim3,user)\n",
    "        print(\"给\",user,\"推荐：\",rec_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
